import os
import pandas as pd

# 患者实体加入
'''
p_list = []
p_id = 1
for i in range(145, 1704):
    # print("患者" + str(p_id) + "\t" + str(i))
    p_list.append("患者" + str(p_id) + "\t" + str(i) + "\n")
    p_id += 1
# print(p_list)

with open("entity2id.txt", "a", encoding="utf-8") as f:
    for p in p_list:
        f.write(p)
    f.close()
'''

zhenzhuangs = []
triples = []
with open("../医学知识/entity2id.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()
    for line in lines:
        if len(line.split()) > 1:
            # print(line.split()[0], line.split()[1])
            zhenzhuangs.append([line.split()[0], line.split()[1]])
    f.close()
# print(zhenzhuangs)


dirs = os.listdir("./病历")
# print(dirs)

for dir in dirs:
    df = pd.read_excel('./病历/' + dir)
    # print(df)
    for index, row in df.iterrows():
        triple = []
        # 症状三元组
        # print(row['症状'])
        # print(row['症状'].split(','))
        # 去重
        zzl = list(set(row['症状'].split(',')))
        # print(zzl)
        zzl.append(row['诊断'].split(',')[0])
        # print(zzl)
        # print(len(row['症状'].split(',')))
        for e in zzl:
            # print(e)
            for zhenzhuang in zhenzhuangs:
                if zhenzhuang[0] == e:
                    triple.append(zhenzhuang[1])
        # print(triple)
        triples.append(triple)
# print(len(triples))

f = open("prediction.txt", "w", encoding="utf-8")
f.write(str(len(triples))+"\n")
for triple in triples:
    line = ''
    for word in triple[:-1]:
        line += str(word) + '\t'
    line += str(triple[-1]) + '\n'
    # print(line)
    f.write(line)
